Get Eaton sequences for manual curations
# Read blastp results files
files <- dir_ls(path = blastp_folder, glob = "*_blastp.tsv")
# function to add file name to dataframe
read_and_record_filename <- function(filename){
read_tsv(filename, col_names = c("qseqid", "sseqid", "evalue", "stitle", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "bitscore")) %>%
mutate(filename = path_file(filename))
}
# gather files into one dataframe
blastp <- map_df(files, read_and_record_filename) %>%
separate(qseqid, into=c("TSIV_ORF"), sep="\\|", remove=FALSE, extra="drop") %>%
mutate(TSIV_ORF = as.integer(str_remove(TSIV_ORF, "gene_"))) %>%
separate(filename, into=c("virus_accession"), sep="\\.", extra="drop")
## Rows: 193 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 101 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 194 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 192 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 176 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 204 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 112 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 105 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 201 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 94 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 115 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 190 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 193 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 194 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 196 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 199 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 192 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 197 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 194 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 194 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 193 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 196 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 193 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 192 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 193 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 193 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 196 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 196 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 194 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 194 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 192 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 186 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 101 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 94 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 64 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 203 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 202 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 202 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 204 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 57 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 193 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 199 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 200 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 199 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 194 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 195 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 196 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 200 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 197 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 194 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 204 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 203 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 189 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 200 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 200 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 207 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 203 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 175 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 171 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 172 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 176 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 101 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 196 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 187 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 118 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 209 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 201 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (3): qseqid, sseqid, stitle
## dbl (10): evalue, pident, length, mismatch, gapopen, qstart, qend, sstart, s...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blastp %>%
arrange(TSIV_ORF)
# Get top hit for each accession for each TSIV ORF
top_blastp <- blastp %>%
filter(evalue < 0.01) %>%
group_by(TSIV_ORF, virus_accession) %>%
arrange(evalue, -bitscore) %>%
slice(1) %>%
ungroup() %>%
arrange(TSIV_ORF)
top_blastp
# Get core gene table
core_genes_blastp <- top_blastp %>%
select(qseqid, TSIV_ORF, sseqid, virus_accession) %>%
pivot_wider(id_cols = c("qseqid", "TSIV_ORF"), names_from = virus_accession, values_from = sseqid) %>%
separate(FV3_AY548484, into=c("FV3"), sep="\\(", remove=FALSE, extra="drop") %>%
left_join(core_genes, ., by=c("FV3")) %>%
filter(!is.na(Order), !TSIV_ORF %in% c(82, 29)) %>%
arrange(Order)
core_genes_blastp
core_genes_blastp %>%
write_tsv(core_gene_outfile)
# Get TSIV core genes
TSIV_core_genes_blastp <- core_genes_blastp%>%
select(Order, `Gene Name`, TSIV_ORF, qseqid)
TSIV_core_genes_blastp